
#---------------------------------------------------------------------------------------#
#---------------------------------------------------------------------------------------#
# Nicholas Garvin 16 Aug 2018. Contact garvinn@rba.gov.au or nick.j.garvin@gmail.com
#    for enquiries or comments.
#
# The following code is used to generate the figures and tables in Garvin (2018). These 
#   functions are imported by the script 'Garvin (2018) tables etc RUN CODE.R'. That 
#   script returns the data behind the tables and charts. Readers will not be able to run
#   the code because it is written around confidential datasets not available to the 
#   reader. 
#
# The last section of the script 'Garvin (2018) tables etc RUN CODE.R' provides dummy
#   examples of some of the objects used by these functions, such as the 'data.objects'
#   list, which contains the algorithm output. Other objects (such as the 'iboc' dataset 
#   and the 'APRA.aggs.F' function) are not provided.
#---------------------------------------------------------------------------------------#

# Creates a data frame of repo positions from the algorithm output
repo.df.F <- function(year, stt=c(), cr=c(), data.objects, years, secprices=c(), 
                      neg=FALSE, allvars=TRUE) {
  y <- data.objects[[which(years %in% year)]]
  #stop()
  if(allvars) ids <- y[[2]]
  if(allvars) secids <- y[[3]]
  y <- y[[1]]
  int.bnds <- c(0, 0.15)  # Bounds used by the uniroot function to find interest rates.
  twotr <- rbind(y[[3]][[1]], y[[3]][[2]])
  twotr <- twotr[apply(twotr, 1, function(x) 
    !any(y[[2]]$Snd[x[1]] == y[[2]]$Snd[x[2]])), ]  # Remove if same Snd and Rec
  mltrepos <- y[[4]][[1]]
 
  mltrepos <- mltrepos[unlist(lapply(mltrepos, function(x) 
    !any(y[[2]]$Snd[abs(x)] == y[[2]]$Rec[abs(x)])))]  # Remove if same Snd and Rec
  
  repos.L <- c(lapply(1:nrow(twotr), function(x) sort(twotr[x, ])), mltrepos)
  
  # The resulting dataframe has different columns for diferent transactions. The 2/3/etc
  #   refers to the transaction number. This code is specific to tr.cap <= 6.
  colnms <- c('ISIN', 'ST', 'lnd', 'brw', 'FV', 'cons', 'mat', 'simpint', 'compint',
              'trnsno', 'crdec',
              'ST2', 'pyr2', 'FV2', 'cons2', 'ST3', 'pyr3', 'FV3', 'cons3', 
              'ST4', 'pyr4', 'FV4', 'cons4','ST5', 'pyr5', 'FV5', 'cons5', 
              'ST6', 'pyr6', 'FV6', 'cons6')
  # Forms all the variables for a single repo
  fn <- function(x) {
    c.int.slv.F <- function(r, cnsds, ST) sum(cnsds*((1 + (r/365))^(ST[1] - ST)))
    s.int.slv.F <- function(r, cnsds, ST) sum(1/(1+(r/365)*(ST - ST[1]))*cnsds)
    mtr <- y[[2]][abs(repos.L[[x]]), ]
    mtr <- mtr[order(mtr$ST), ]
    cnsds <- mtr$Cons*((mtr$Snd == mtr$Snd[1])*2 - 1)
    vec <- c(mtr$ST[1], mtr$Rec[1], mtr$Snd[1], mtr$FV[1], mtr$Cons[1], 
             floor(mtr$ST[nrow(mtr)]) - floor(mtr$ST[1]), 
             100*uniroot(s.int.slv.F, int.bnds, cnsds, floor(mtr$ST), 
                         extendInt='yes', tol=1e-10)$root, 
             100*uniroot(c.int.slv.F, int.bnds, cnsds, floor(mtr$ST), 
                         extendInt='yes', tol=1e-10)$root, 
             nrow(mtr), (sum(floor(mtr$ST[1]):(floor(mtr$ST[nrow(mtr)]) - 1) %in% 
                               cr$date[cr$decision == 1]) > 0)*1)
    for(i in 2:nrow(mtr)) vec <- c(vec, mtr$ST[i], mtr$Rec[i], mtr$FV[i], mtr$Cons[i])
    head(c(vec, rep(NA, length(colnms))), length(colnms) - 1)
  }
  repos.M <- do.call(rbind, lapply(1:length(repos.L), fn))  # Runs fn for all repos.
  isins <- unlist(lapply(1:length(repos.L), 
                         function(x) y[[2]]$ISIN[abs(repos.L[[x]][1])]))
  repos.M <- data.frame(isins, repos.M)
  colnames(repos.M) <- colnms
  # Return the data so far if allvars=FALSE
  if(!allvars) return(cbind(year=year, repos.M))
  # Add in lender and borrower IDs and security types
  lndent <- ids[match(repos.M$lnd, ids[, 'acnt']), 'name']
  brwent <- ids[match(repos.M$brw, ids[, 'acnt']), 'name']
  aplnd <- ids[match(repos.M$lnd, ids[, 'acnt']), 'name']
  sec <- rep(NA, nrow(repos.M))
  sec[secids[match(repos.M$ISIN, secids[, 'ISIN']), 'code'] == 'COMM'] <- 'ags'
  sec[secids[match(repos.M$ISIN, secids[, 'ISIN']), 'code'] %in% stt] <- 'sgs'
  sec[is.na(sec)] <- 'oth'
  # Add in security prices
  secpr <- secprices$PriceMid[match(paste(repos.M$ISIN, floor(repos.M$ST)), 
                                    paste(secprices$ISIN, secprices$AsAt))]
  days <- unique(floor(repos.M$ST))
  sdlag <- days[match(floor(repos.M$ST), c(days[2:length(days)], NA))]
  secprlag <- secprices$PriceMid[match(paste(repos.M$ISIN, sdlag), 
                                       paste(secprices$ISIN, secprices$AsAt))]
  # Calculate haircuts
  hct <- (secpr/100)*(-repos.M$FV)/repos.M$cons - 1
  # Spread to cash rate
  if(neg) sprcr <- -cr$Cash.Rate.Target else sprcr <- cr$Cash.Rate.Target
  sprdsimp <- repos.M$simpint - sprcr[match(floor(repos.M$ST), cr$date)] 
  # Now spreads for the repos spanning policy changes
  cr2 <- cr$date[1]:cr$date[nrow(cr)]  # Add weekends etc to cash rate data
  cr2 <- cbind(cr2, cr$Cash.Rate.Target[match(cr2, cr$date)])
  for(i in 1:3) cr2[is.na(cr2[, 2]), 2] <- cr2[which(is.na(cr2[, 2])) - i, 2] 
  opclds <- cbind(floor(repos.M$ST), floor(repos.M$ST) + repos.M$mat)
  fn <- function(x) {
    repos.M$simpint[x] - mean(cr2[match(opclds[x, 1], 
                                        cr2[, 1]), 2]:cr2[match(opclds[x, 2], cr2[, 1]), 2])
  }
  for(i in which(repos.M$crdec == 1)) sprdsimp[i] <- fn(i)
  
  cbind(year, lndent, brwent, sec, secpr, hct, sprdsimp, repos.M)
}
# Figure 1
outst.overnight.chart.F <- function(repo, iboc) {
  x <- repo[!(nchar(as.character(repo$lndent)) == 5 | 
                nchar(as.character(repo$brwent)) == 5), ]  # Remove state govt repos
  ds <- unique(c(floor(repo$ST), floor(repo$ST2)))  # Days
  emat <- match(floor(x$ST) + x$mat, ds) - match(floor(x$ST), ds)  # Busns day maturity
  x <- x[emat == 1, ]  # Keeps Friday to Monday and overnight repos
  x$ST <- floor(x$ST)
  x <- aggregate(cons ~ ST, data=x, sum)
  x <- cbind(x, iboc=1e6*iboc$unsecured[match(x$ST, iboc$date)])
  x[, c('cons', 'iboc')] <- x[, c('cons', 'iboc')]/1e9
  x
}
# Tables 1 and 2
stats.table.F <- function(year, data.objects, years, 
                          whichtable) {  # This is 1 or 2 depending on which table
  # Pull in the repo data
  y <- data.objects[[which(years %in% year)]][[1]]
  
  if(whichtable == 1) {  # The first table
    rownames1 <- c('total', 'twotr', 'unique', 'multtr', 'iter', 'cvleq22', 
                   'cvleq45', 'maxcv', 'intra', 'trtot', 'repoprop')
    stats1 <- vector('list', length(rownames1))
    names(stats1) <- rownames1
    mltrepos <- y[[4]][[1]]
    stats1$total <- max(nrow(y[[3]][[1]]), 0) + max(nrow(y[[3]][[2]]), 0) + 
      length(mltrepos)
    stats1$twotr <- max(nrow(y[[3]][[1]]), 0) + max(nrow(y[[3]][[2]]), 0) + 
      sum(unlist(lapply(mltrepos, length) == 2))
    stats1$unique <- max(nrow(y[[3]][[1]]), 0)
    stats1$multtr <- stats1$total - stats1$twotr
    stats1$iter <- sum(y[[4]][[2]][, 2] == -1)
    stats1$cvleq22 <- round(sum(y[[4]][[2]][, 1] <= 22)/nrow(y[[4]][[2]])*100, 1)
    stats1$cvleq45 <- round(sum(y[[4]][[2]][, 1] <= 45)/nrow(y[[4]][[2]])*100, 1)
    stats1$maxcv <- max(y[[4]][[2]][, 1])
    stats1$intra <- length(y[[1]])
    stats1$trtot <- sum(y[[2]]$Cons > 0 & y[[2]]$Snd != y[[2]]$Rec)
    repotrns <- 2*(stats1$intra + stats1$twotr) + length(unlist(y[[4]][[1]]))
    stats1$repoprop <- round(repotrns/stats1$trtot*100, 1)
    return(stats1) 
  }
  if(whichtable == 2) {  # The second table
    rownames2 <- c('thrtr', 'fourtr', 'fivetr', 'sixtr', 'prtpay', 'prtln', 'cltmv')
    stats2 <- vector('list', length(rownames2))
    names(stats2) <- rownames2
    tbl <- table(unlist(lapply(y[[4]][[1]], length)))
    stats2$thrtr <- unname(tbl[names(tbl) == 3])
    stats2$fourtr <- unname(tbl[names(tbl) == 4])
    stats2$fivetr <- unname(tbl[names(tbl) == 5])
    stats2$sixtr <- unname(tbl[names(tbl) == 6])
    stats2$prtpay <- sum(lapply(y[[4]][[1]], function(x) sum(x < 0)) > 1) 
    stats2$prtln <- sum(lapply(y[[4]][[1]], function(x) sum(x > 0)) > 1) 
    stats2$cltmv <- sum(lapply(y[[4]][[1]], 
                               function(x) sum(y[[2]]$Cons[abs(x)] == 0)) > 0)
    return(stats2)
  }
}
# Table 3
table.flspos.F <- function(year, repo.fp, table1) {
  
  y <- fls.det.objects[[which(c(2006, 2008:2010, 2012:2015) %in% year)]][[1]]
  rownames <- c('total', 'twotr', 'multtr', 'twofls', 'mltfls', 'twotot', 'mlttot')
  stats <- vector('list', length(rownames))
  names(stats) <- rownames
  # Total false detections
  stats$twofls <- sum(repo.fp$trnsno[repo.fp$year == year] == 2)
  stats$mltfls <- sum(repo.fp$trnsno[repo.fp$year == year] > 2)
  # Total actual detections
  stats$twotot <- table1[, colnames(table1) == year]$twotr
  stats$mlttot <- table1[, colnames(table1) == year]$multtr
  # Proportions
  stats$total <- round(100*(stats$twofls + stats$mltfls)/(stats$twotot + stats$mlttot), 
                       2)
  stats$twotr <- round(100*stats$twofls/stats$twotot, 2)
  stats$multtr <- round(100*stats$mltfls/stats$mlttot, 2)
  stats
}
# Figure 2
flspos.chartdata.F <- function(repo.df, repo.fp, twotr.only=FALSE, logscale=FALSE) {
  std <- round(repo.df$sprdsimp, 1)
  fps <- round(repo.fp$sprdsimp, 1)
  if(twotr.only) {
    std <- std[repo.df$trnsno < 3]
    fps <- fps[repo.fp$trnsno < 3]
  }
  ch <- cbind(std=table(std[std >= -1 & std <= 1]), fps=table(fps[fps >= -1 & fps <= 1]))
  if(logscale) ch[, 1:2] <- log10(ch[, 1:2])
  ch
}
# Table 4
rndint.table.F <- function(year, repo) { 
  repo <- repo[repo$year %in% year & repo$crdec == 0, ]
  rnd3 <- round(repo$simpint, 4)
  rnd2 <- round(repo$simpint, 2)
  y <- c(sum(rnd3 != rnd2), sum(rnd3 != rnd2 & repo$trnsno == 2), 
         sum(rnd3 != rnd2 & repo$trnsno > 2))
  c(100*y[1]/nrow(repo), 100*y[2]/sum(repo$trnsno == 2), 100*y[3]/sum(repo$trnsno > 2))
}
# Table 5
fls.om.F <- function(year, cond.type, mat.cap, interest.bounds, stt, icsds, cr, 
                     data.objects) {
  # cond.type is one of: 'id', 'sec', 'zeroint' or 'FVint'
  int.rng <- 1 + interest.bounds / 365
  
  y <- data.objects[[which(c(2006, 2008:2010, 2012:2015) %in% year)]]
  
  ids <- y[[2]]
  secids <- y[[3]]
  y <- y[[1]]
  
  # Remove detected two-trns and intraday repos from transactions data
  trns <- y[[2]][-unlist(c(y[[3]][[1]], y[[3]][[2]], y[[1]])), ]
  trns2 <- trns
  
  if(cond.type == 'id') {
    # Insert the generalised entity IDs
    trns$Snd <- ids[match(trns$Snd, ids[, 'acnt']), 'name']
    trns$Rec <- ids[match(trns$Rec, ids[, 'acnt']), 'name']
    trns <- trns[trns$Snd != trns$Rec & !is.na(trns$Snd) & !is.na(trns$Rec), ]
  }
  if(cond.type == 'sec') {
    # Insert generalised security types
    sec <- rep(NA, nrow(trns))
    sec[secids[match(trns$ISIN, secids[, 'ISIN']), 'code'] == 'COMM'] <- 'ags'
    sec[secids[match(trns$ISIN, secids[, 'ISIN']), 'code'] %in% stt] <- 'sgs'
    sec[is.na(sec)] <- 'oth'
    trns$ISIN <- sec
  }
  if(cond.type == 'zeroint') int.rng <- c(1, 1)
  # Run the algorithm
  prs.L <- ovrlp.2trns.F(trns, int.rng, mat.cap, intraday=NULL)
  prlctn.2tr <- det.2trns.F(prs.L, trns)  # List of 2-element vectors (matched pairs)
  prlctn.2tr <- rbind(prlctn.2tr[[1]], prlctn.2tr[[2]])
  # Put it into a matrix
  repo.fo <- cbind(trns[prlctn.2tr[, 1], ], ST2=trns[prlctn.2tr[, 2], ]$ST, 
                   Cons2=trns[prlctn.2tr[, 2], ]$Cons) 
  repo.fo <- cbind(repo.fo, mat=floor(repo.fo$ST2) - floor(repo.fo$ST))
  repo.fo <- cbind(repo.fo, 
                   sint=round(365*((repo.fo$Cons2/repo.fo$Cons - 1)/repo.fo$mat), 6))
  repo.fo <- repo.fo[repo.fo$mat != 0, ]  # Remove intraday
  ### Check if any trns appear in mult-trns repos
  mlt.dup <- sum(c(prlctn.2tr) %in% abs(unlist(y[[4]][[1]])))
  ### Check overlapping cash rate days
  fn <- function(x) {
    any(floor(repo.fo$ST[x]):(floor(repo.fo$ST2[x]) - 1) %in% cr$date[cr$decision == 1])
  }
  if(nrow(repo.fo) > 0) {
    repo.fo <- cbind(repo.fo, crdec=unlist(lapply(1:nrow(repo.fo), fn))*1)
  }
  
  vars <- c()
  if(cond.type == 'id') {
    # how many aggregated IDs there are, how many AC IDs there were
    vars <- c(length(unique(c(as.character(trns$Snd), as.character(trns$Rec)))), 
              length(unique(c(trns2$Snd[trns2$Snd %in% ids[, 'acnt']], 
                              trns2$Rec[trns2$Rec %in% ids[, 'acnt']]))))
  }
  # How many new detections were 'other
  if(cond.type == 'sec') vars <- sum(repo.fo$ISIN != 'oth')
  # How many involved an icsd account - HSBC client or CITI client
  if(cond.type == 'zeroint') {
    vars <- sum(ids[match(repo.fo$Snd, ids[, 'acnt']), 'name'] %in% icsds)
  }
  
  # Variables to return: New detections, new detections excl policy dec days, 
  # how many of these have rounded interest rates, how many in mlt trns repos.
  # Plus idiosyncratic variables.
  c(nrow(repo.fo), sum(repo.fo$crdec == 0), 
    sum(repo.fo$crdec == 0 & repo.fo$sint == round(repo.fo$sint, 4)), mlt.dup, vars)
}
fls.om.table.F <- function(years, mat.cap, tr.cap, stt, int.bnds, icsds, cr, 
                           data.objects) {
  fo.id <- c()
  for(y in 1:length(years)) fo.id <- 
      cbind(fo.id, fls.om.F(years[y], c('id'), mat.cap, 
                            c(int.bnds[y, 'lb'], int.bnds[y, 'ub']), 
                            stt, icsds, cr, data.objects))
  fo.sec <- c()
  for(y in 1:length(years)) fo.sec <- 
    cbind(fo.sec, fls.om.F(years[y], c('sec'), mat.cap, 
                           c(int.bnds[y, 'lb'], int.bnds[y, 'ub']), 
                           stt, icsds, cr, data.objects))
  fo.zint <- c()
  for(y in 1:length(years)) fo.zint <- 
    cbind(fo.zint, fls.om.F(years[y], c('zeroint'), mat.cap, 
                            c(int.bnds[y, 'lb'], int.bnds[y, 'ub']), 
                            stt, icsds, cr, data.objects))
  fo.table <- rbind(fo.id, fo.sec, fo.zint[c(1, 4, 5), ])
  colnames(fo.table) <- years
  rn <- c('gen ID det', 'det w crdc=0', 'det w rnd int & crdc=0', 'also in mlt repos',
          'no of gen IDs', 'no of norm IDs',
          'gen sec det', 'det w crdc=0', 'det w rnd int & crdc=0', 'also in mlt repos',
          'det with AGS or SGS',
          'zero int det', 'also in mlt repos', 'det excl icsds')
  rownames(fo.table) <- rn
  fo.table
}
twoisin.repos.F <- function(years, int.bnds, data.objects) {
  # Function for finding repos that fit conditions. Returns boolean for each detection
  # x is a four-transaction detected repo
  fn <- function(x, trns) {
    options(scipen=999)
    x <- x[order(abs(x))]
    if(all(x[1:2] > 0) & all(x[3:4] < 0)) {
      y <- floor(trns$ST[abs(x)[1]]) == floor(trns$ST[abs(x)[2]]) & 
        floor(trns$ST[abs(x)[3]]) == floor(trns$ST[abs(x)[4]]) &
        trns$ISIN2[abs(x)[1]] != trns$ISIN2[abs(x)[2]] &
        paste(trns$ISIN2[abs(x)[1]], trns$FV[abs(x)[1]]) %in% 
        paste(trns$ISIN2[abs(x)[3:4]], trns$FV[abs(x)[3:4]]) & 
        paste(trns$ISIN2[abs(x)[2]], trns$FV[abs(x)[2]]) %in% 
        paste(trns$ISIN2[abs(x)[3:4]], trns$FV[abs(x)[3:4]])
    } else y <- FALSE
    y
  }
  # Now apply it and create a vector of each year
  dets <- rep(NA, length(years))
  for(i in 1:length(years)) {
    year <- years[i]
    # Pull in required data sets
    y <- data.objects[[which(c(2006, 2008:2010, 2012:2015) %in% year)]]
    trns <- y[[1]][[2]]
    matched <- c(unlist(y[[1]][[1]], y[[1]][[3]]))
    hld <- y[[3]]
    rm(y)
    # Reformat transactions data
    sec <- hld[match(trns$ISIN, hld[, 'ISIN']), 'code']
    # 'COMM' is the issuer code for AGS
    trns <- trns[sec == 'COMM', ]
    trns <- cbind(trns, ISIN2=trns$ISIN)
    levels(trns$ISIN) <- c(levels(trns$ISIN), 'COMM')
    trns$ISIN <- 'COMM'
    # Detect four-transaction repos
    int.rng <- 1 + int.bnds[int.bnds[, 1] == year, c('lb', 'ub')] / 365
    repos <- mult.trns.F(trns, matched, iter.mthd=TRUE, matrix.max=1, iter.max=45, 
                              mat.cap=14, tr.cap=4, int.rng)
    repos4 <- repos[[1]][lapply(repos[[1]], length) == 4]
    # Apply the above 'fn' function to look for multiple ISIN repos
    poss <- unlist(lapply(repos4, fn, trns))
    dets[i] <- sum(poss)
  }
  dets
}
# Table 6
APRA.alg.reg.F <- function(repo, apra.incl, apra.excl, years, sepends, repent, 
                           twotronly=FALSE) {
  
  if(twotronly) repo <- repo[repo$trnsno == 2, ]
  repo2 <- cbind(lndaln=repent$alignID[match(repo$lnd, repent$repoID)],  # Aligned IDs
                 brwaln=repent$alignID[match(repo$brw, repent$repoID)] , repo)
  apra <- APRA.aggs.F(apra.incl, apra.excl, years, sepends)[[1]]  # Generate APRA data
  apra <- cbind(alnID=repent$alignID[match(apra$ent, repent$apraID)], apra)
  # Format the repo data
  cnsds <- repo2[, grepl('cons', colnames(repo2))]  # Get transaction considerations
  cnsds <- cbind(cnsds[, 1], 
                 ((repo2[, grepl('pyr', colnames(repo2))] == repo2$lnd)*2 - 1)*
                   cnsds[, -1])  # Make cons neg if a repayment
  sds <- floor(repo2[, grepl('ST', colnames(repo2))])  # Take the settlement days
  qes <- sepends[match(repo2$year, years)]  # Quarter ends
  qendrepos <- do.call(rbind, lapply(1:nrow(sds), function(x) 
    range(as.numeric(sds[x, ]), na.rm=TRUE)))  # Work out which repos overlap q ends
  qendrepos <- which(qendrepos[, 1] <= qes & qendrepos[, 2] > qes)
  cnsout <- rep(0, nrow(repo2))  # Store each repo's outstanding consideration at q end
  for(i in qendrepos) cnsout[i] <- sum(cnsds[i, 1:max(which(sds[i, ] <= qes[i]), 
                                                      na.rm=TRUE)])
  repo2 <- cbind(cnsout, qes, repo2)
  # Aggregate by lender and borrower - ensure NAs don't affect sums
  repo2$lndaln <- as.factor(ifelse(is.na(repo2$lndaln), 'other',
                                   as.character(repo2$lndaln)))  # Converting NAs
  repo2$brwaln <- as.factor(ifelse(is.na(repo2$brwaln), 'other',
                                   as.character(repo2$brwaln)))  # Converting NAs
  r.lndag <- aggregate(cnsout ~ lndent + lndaln + qes + sec, data=repo2, sum)
  r.brwag <- aggregate(cnsout ~ brwent + brwaln + qes + sec, data=repo2, sum)
  # Keep only entities that are present in repo data that year
  apra <- apra[paste(apra$date, apra$alnID) %in% 
                 c(paste(r.lndag$qes, r.lndag$lndln), 
                   paste(r.lndag$qes, r.brwag$brwaln)), ]
  # Format APRA data into long shape. Won't work if apra column ordering has changed
  apcn <- colnames(apra)
  apsec <- c(rep('ags', nrow(apra)), rep('sgs', nrow(apra)), rep('oth', nrow(apra)))
  apralng <- rbind(setNames(apra[, c(1:4, 7)], c(apcn[1:3], 'lnd', 'brw')),
                   setNames(apra[, c(1:3, 5, 8)], c(apcn[1:3], 'lnd', 'brw')),
                   setNames(apra[, c(1:3, 6, 9)], c(apcn[1:3], 'lnd', 'brw')))
  apralng <- cbind(apralng, sec=apsec)
  # Generate the regression data
  # Use entities from apra data that appear in repo data that year
  cor.data <- expand.grid(unique(apra$date), unique(apra$alnID), c('ags', 'sgs', 'oth'))
  alglnd <- r.lndag$cnsout[match(do.call(paste, cor.data), 
                                 do.call(paste, r.lndag[, c('qes', 'lndaln', 'sec')]))]
  algbrw <- r.brwag$cnsout[match(do.call(paste, cor.data), 
                                 do.call(paste, r.brwag[, c('qes', 'brwaln', 'sec')]))]
  aprlnd <- as.numeric(as.character(apralng$lnd[match(do.call(paste, cor.data), 
                             do.call(paste, apralng[, c('date', 'alnID', 'sec')]))]))
  aprbrw <- as.numeric(as.character(apralng$brw[match(do.call(paste, cor.data), 
                             do.call(paste, apralng[, c('date', 'alnID', 'sec')]))]))
  cor.data <- cbind(cor.data, alglnd, algbrw, aprlnd, aprbrw)
  colnames(cor.data) <- c('date', 'ent', 'sec', 'alglnd', 'algbrw', 'aprlnd', 'aprbrw')
  # Return the regression and repo data
  list(cor.data, repo2)
}
cor.reg.F <- function(repo, apra.incl, apra.excl, sec.incl, lndbrw, twotronly, 
                      years, sepends, repent, lvl=c('full'), pos.only=TRUE) {
  # lvl is 'entity' or 'year' to collapse the data into less granularity
  cor.data <- APRA.alg.reg.F(repo, apra.incl, apra.excl, years, sepends, repent, 
                             twotronly)[[1]]
  if(lndbrw == 'lnd') {
    data <- cor.data[, c(1:4, 6)]
  } else {
    data <- cor.data[, c(1:3, 5, 7)]
  }
  colnames(data) <- c('date', 'ent', 'sec', 'alg', 'apr')
  
  data <- data[data$sec %in% sec.incl & !is.na(data$alg) & !is.na(data$apr), ]
  if(lvl == 'entity') data <- aggregate(cbind(alg, apr) ~ ent + sec, data=data, sum)
  if(lvl == 'year') data <- aggregate(cbind(alg, apr) ~ date + sec, data=data, sum)
  if(pos.only) data <- data[data$alg > 0 & data$apr > 0, ]
  data[, c('alg', 'apr')] <- data[, c('alg', 'apr')]/1e9
  regout <- summary(lm(apr ~ alg, data=data))
  if(regout$coef[2, 1] >= 1) {
    pH1 <-2*(1 - pt((regout$coef[2, 1] - 1)/regout$coef[2, 2], regout$df[2]))
  } else {
    pH1 <-2*(pt((regout$coef[2, 1] - 1)/regout$coef[2, 2], regout$df[2]))
  }
  outvec <- round(c(regout$coef[1, -3], regout$coef[2, -3], pH1,
                    regout$r.squared, sqrt(regout$r.squared), nrow(data)), 3)
  outvec
}  
# Calculates bilateral outstanding positions each day. Discounts each repo payment 
#   to the first day of that repo using that repo's interest rate.
gross.bi.pos.F <- function(year, repo) {
  repo <- repo[repo$year == year, ]
  repo <- repo[as.character(repo$brwent) != as.character(repo$lndent), ]
  cps <- paste(repo$lndent, repo$brwent)
  dys <- sort(unique(floor(unlist(c(repo[, grepl('ST', colnames(repo))])))))
  dys <- dys[!is.na(dys)]
  cns <- repo[, grepl('cons', colnames(repo))]
  sds <- floor(repo[, grepl('ST', colnames(repo))])
  dir <- (matrix(rep(repo$lnd, 5), nrow=nrow(repo)) == 
            repo[, grepl('pyr', colnames(repo))])*2 - 1  # dirn of each trnsn
  dir <- cbind(1, dir)  # dir has a column for each transaction
  # For each repo, discounted value outstanding each day
  fn <- function(x) {  # x is a row number of repo data
    x1 <- as.numeric(dir[x, ]*cns[x, ]/  # Discount the trnsns
                       (1 + repo$simpint[x]*(sds[x, ] - sds[x, 1])/36500))
    x1 <- as.numeric(x1[!is.na(x1)])  # Remove nas
    sd <- as.numeric(sds[x, !is.na(sds[x, ])])  # Store the settlement days
    x2 <- aggregate(x1, list(sd), sum)  # Sum trnsns on same day
    out <- rep(0, length(dys))  # Store in vector of values across all days
    out[dys %in% x2$Group.1] <- x2$x  
    round(cumsum(out), 0)  # Cumulate the trnsns
  }
  outst <- do.call(rbind, lapply(1:nrow(repo), fn))
  outst <- aggregate(outst, list(cps, repo$sec), sum)  # Collapse repos to CP pairs
  outst <- cbind(do.call(rbind, strsplit(outst[, 1], ' ')), outst)
  colnames(outst) <- c('lnd', 'brw', 'lndbrw', 'sec', dys)
  outst
}
# Figure 4
outst.chart.F <- function(years, repo, trunc) {
  outfl <- lapply(years, gross.bi.pos.F, repo)  # Entity-level outstanding posns
  fn <- function(x) {
    # Strip out unused columns before aggregating
    x <- x[, !colnames(x) %in% c('lnd', 'brw', 'lndbrw')]
    y <- aggregate(.~sec, data=x, sum)  # Aggregate each day (col) by security
    y[!is.na(y[, 1]), ]
  }
  out <- do.call(cbind, lapply(outfl, fn))  # Apply to each year and combine
  mean.out <- matrix(0, nrow=length(years), ncol=3)
  colnames(mean.out) <- out[1:3, 1]
  for(y in 1:length(years)) {  # For each year average across days
    for(c in 1:3) {
      mean.out[y, c] <- mean(unlist(out[c, colnames(out) %in% 
                                          trunc[y, 1]:trunc[y, 2]]))
    }
  }
  mean.out <- mean.out/1e9
  return(mean.out)
}
# Table 7
issuer.table.F <- function(repo, rowno, years, data.objects) {
  # Collect issuer types
  clttyp <- vector('list', length(years))
  for(y in 1:length(years)) clttyp[[y]] <- data.objects[[y]][[3]]
  clttyp <- do.call(rbind, clttyp)  # Put into one data frame
  clttyp <- clttyp[!duplicated(clttyp), ]  # Remove duplicates
  x <- repo  # Align repo data with holdings.
  x <- cbind(iss=clttyp[match(x$ISIN, clttyp[, 'ISIN']), 'code'], x)
  tbl <- table(x$iss)[table(x$iss) > 0]  # Count the repos for each issuer
  tbl <- sort(tbl, decreasing=TRUE)
  tbl <- tbl[1:rowno]
  # Pull in the cps and an example ISIN
  cps <- matrix(NA, nrow=rowno, ncol=3)
  colnames(cps) <- c('brws', 'lnds', 'egISIN')
  for(r in 1:rowno) {
    cps[r, 1] <- length(unique(x$brw[x$iss == rownames(tbl)[r]]))
    cps[r, 2] <- length(unique(x$lnd[x$iss == rownames(tbl)[r]]))
    cps[r, 3] <- tail(as.character(x$ISIN[x$iss == rownames(tbl)[r]]), 1)
  }
  tbl <- cbind(tbl, cps)
  as.data.frame(tbl)
}
# Table 8
isin.use.table.F <- function(year, repo, data.objects) {
  # Pull in issuer code data
  hld <- data.objects[[which(c(2006, 2008:2010, 2012:2015) %in% year)]][[3]]
  hld <- hld[hld[, 'code'] == 'COMM', ]
  x <- repo[repo$year == year & repo$trnsno == 2, ]
  x <- cbind(iss=hld[match(x$ISIN, hld[, 'ISIN']), 'code'], x)
  # Count use in repos
  freq <- table(x$ISIN[x$sec == 'ags'])[table(x$ISIN[x$sec == 'ags']) > 0]
  tbl <- hld[!duplicated(hld[, 'ISIN']), c('ISIN', 'subclass')]  # Align with bond type
  tbl <- cbind(tbl, repos=freq[match(tbl[, 'ISIN'], names(freq))])
  tbl[is.na(tbl[, 'repos']), 'repos'] <- 0
  agstyp <- c('TB', 'TI', 'TN')
  tbl2 <- matrix(NA, nrow=4, ncol=3)  # Smaller table of summary stats
  rownames(tbl2) <- c('Number of ISINs on issue', 'Least use of an ISIN', 
                      'Median use across ISINs', 'Most use of an ISIN')
  colnames(tbl2) <- c('Bonds', 'Indexed bonds', 'Notes')
  for(t in 1:3) {  # Loop across bond types
    subset <- tbl[tbl[, 'subclass'] == agstyp[t], ]
    repo.freq <- as.numeric(subset[, 'repos'])
    tbl2[1, t] <- nrow(subset)  # Number of ISINs
    tbl2[2:4, t] <- summary(repo.freq)[c(1, 3, 6)]  # Least, median and most
  }
  tbl2
}
# Figures 5 and 12 (output tailored to our graphing software)
sprdtime.chart.F <- function(year, repo, variable) {
  # Variable is 'sprdsimp' or 'hct'
  if(variable == 'hct') repo <- repo[!is.na(repo$hct), ]
  repo <- repo[repo$year == year, ]
  sds <- sort(unique(floor(repo$ST)))
  if(variable == 'sprdsimp') repo <- repo[repo$crdec == 0, ]  # remove policy decisions
  STorder <- match(floor(repo$ST), sds) + (repo$ST - floor(repo$ST))  # Create x scale
  ags <- cbind(date=STorder, sprd=round(100*repo[, variable], 2))[repo$sec == 'ags', ]
  sgs <- cbind(date=STorder, sprd=round(100*repo[, variable], 2))[repo$sec == 'sgs', ]
  if(variable == 'sprdsimp') {
    oth <- cbind(date=STorder, sprd=round(100*repo$sprdsimp, 2))[repo$sec == 'oth', ]
  }
  if(variable == 'sprdsimp') colno <- 6 else colno <- 4
  output <- matrix('', nrow=nrow(repo), colno)  # Separate non-overlapping columns for
  output[1:nrow(ags), 1:2] <- ags               #   each series.
  output[(nrow(ags) + 1):(nrow(ags) + nrow(sgs)), 3:4] <- sgs
  if(variable == 'sprdsimp') output[(nrow(ags) + nrow(sgs) + 1):nrow(repo), 5:6] <- oth
  output
}
# Figure 6
annual.sprd.chart.F <- function(years, repo, htpair, overnight=FALSE, av=FALSE) {
  # av=TRUE: weighted average instead of median. overnight=TRUE: overnight loans only
  x <- repo[repo$crdec == 0 & !(repo$lndent == htpair['lnd'] & 
                                  repo$brwent == htpair['brw']), ]
  if(overnight) {
    ds <- unique(c(floor(x$ST), floor(x$ST2)))
    emat <- match(floor(x$ST) + x$mat, ds) - match(floor(x$ST), ds)
    x <- x[emat == 1, ]
  }
  x <- cbind(x, sprcons=x$sprdsimp*x$cons)  # Spread*cons for weighted average spread
  x2 <- aggregate(cbind(sprcons, cons) ~ year + sec, data=x, sum)
  x2 <- cbind(x2, spr=x2$sprcons/x2$cons)
  x2$spr <- round(100*x2$spr, 2)
  avsprds <- matrix(NA, nrow=length(years), ncol=3)
  secs <- c('ags', 'sgs', 'oth')
  colnames(avsprds) <- secs
  rownames(avsprds) <- years
  for(i in 1:length(years)) for(s in 1:3) avsprds[i, s] <- 
    x2$spr[x2$year == years[i] & x2$sec == secs[s]]
  # Now the medians
  x1 <- aggregate(sprdsimp ~ year + sec, data=x, median)
  x1$sprdsimp <- round(100*x1$sprdsimp, 2)
  medsprds <- matrix(NA, nrow=length(years), ncol=3)
  secs <- c('ags', 'sgs', 'oth')
  colnames(medsprds) <- secs
  rownames(medsprds) <- years
  for(i in 1:length(years)) for(s in 1:3) medsprds[i, s] <- 
    x1$sprdsimp[x1$year == years[i] & x1$sec == secs[s]]
  if(av) return(avsprds) else return(medsprds)
}
# Figure 7
yld.crv.chart.F <- function(repo, htpair, av=FALSE) {
  # av=TRUE gives weighted averages instead of medians
  x <- repo[repo$year >= 2012 & repo$trnsno == 2 &
              !(repo$lndent == htpair['lnd'] & repo$brwent == htpair['brw']), ] 
  x <- cbind(x, sprcons=x$sprdsimp*x$cons)  # First do averages.
  x3 <- aggregate(cbind(sprcons, cons) ~ year + sec + mat, data=x, sum)
  x3 <- cbind(x3, sprd=x3$sprcons/x3$cons)
  x3 <- x3[order(x3$mat), ]
  yldcrva <- matrix(NA, nrow=14, ncol=length(unique(repo$year)))
  for(y in 5:8) yldcrva[, y-4] <- x3$sprd[x3$year == years[y] & x3$sec == 'ags']
  for(y in 5:8) yldcrva[, y] <- x3$sprd[x3$year == years[y] & x3$sec == 'sgs']
  # Now the medians
  x4 <- x[x$year >= 2012 & round(x$simpint, 2) == round(x$simpint, 4), ]
  x4 <- aggregate(sprdsimp ~ year + sec + mat, data=x4, median)
  x4 <- x4[order(x4$mat), ]
  yldcrvm <- matrix(NA, nrow=14, ncol=8)
  for(y in 5:8) yldcrvm[, y-4] <- x4$sprdsimp[x4$year == years[y] & x4$sec == 'ags']
  for(y in 5:8) yldcrvm[, y] <- x4$sprdsimp[x4$year == years[y] & x4$sec == 'sgs']
  if(av) yldcrv <- yldcrva else yldcrv <- yldcrvm
  colnames(yldcrv) <- c(paste(2012:2015, 'ags'), paste(2012:2015, 'sgs'))
  round(100*yldcrv[, c(1, 5, 2, 6, 3, 7, 4, 8)], 2)  # Align data with colnames
}
# Figure 8
mat.share.chart.F <- function(repo, years, trunc, htpair) {
  x <- repo[!(repo$lndent == htpair['lnd'] & repo$brwent == htpair['brw']) & 
              repo$trnsno == 2 & round(repo$simpint, 2) == round(repo$simpint, 4), ]
  # Create maturities excluding weekends
  SDs <- sort(unique(c(floor(x$ST), floor(x$ST2))))
  dint <- 1:length(SDs)  # dint stands for 'day integer'
  x <- cbind(x, mat2=match(floor(x$ST) + x$mat, SDs) - match(floor(x$ST), SDs), 
             sd=floor(x$ST))  # mat2 is business-day maturity
  x2 <- aggregate(cons ~ sd + mat2, data=x, sum)  # Total new repos each day by maturity
  # Replace days with consecutive integers to get rid of weekends etc
  x2 <- cbind(x2, dint=dint[match(x2$sd, SDs)])
  # Create matrix of outstanding each day (in dint) by maturity
  outmat <- matrix(0, nrow=length(SDs), ncol=10)
  rownames(outmat) <- SDs
  for(d in 1:length(SDs)) for(m in 1:10) outmat[d, m] <- 
    sum(x2$cons[x2$mat2 == m & x2$dint <= d & x2$dint + x2$mat2 > d])
  # For each year, average across days that are inside the truncated sample
  outmat2 <- matrix(0, nrow=10, ncol=8)
  colnames(outmat2) <- years
  for(y in 1:8) for(m in 1:10) outmat2[m, y] <- 
    sum(outmat[rownames(outmat) >= trunc[y, 1] & rownames(outmat) <= trunc[y, 2], m])
  # Convert into proportions
  100*outmat2/matrix(rep(colSums(outmat2), 10), nrow=10, byrow=TRUE)
}
# Creating the data for the regressions in Tables 9 and 10 and Figure 11
reg.vars.F <- function(repo, years, data.objects, frn, agg) {
  # agg is TRUE or FALSE. Should virtually identical repos be combined?
  repo <- repo[, colnames(repo) %in% c('year', 'ST', 'lndent', 'brwent', 'sec', 'secpr',
                                       'hct', 'sprdsimp', 'ISIN', 'FV', 
                                       'cons', 'mat', 'simpint', 'trnsno', 'crdec', 
                                       'lnd', 'brw')]
  # Regressions are at accnt level. Keep intra-entity repos but not intra-account repos.
  repo <- repo[repo$brw != repo$lnd, ]
  if(agg) {  # Aggregate virtually identical repos into a single repo
    r2 <- repo
    r2[is.na(r2)] <- Inf
    agdata <- cbind(r2, ST=round(repo$ST, 1), consss=r2$sprdsimp*r2$cons,  # NEED BOTH???? 
                    conssi=r2$simpint*r2$cons, FVhct=r2$hct*r2$FV)
    agsum <- aggregate(cbind(FV, cons, consss, conssi, FVhct) ~ year +
                         ST + lndent + brwent + sec + secpr + ISIN + mat + crdec +
                         lnd + brw, data=agdata, sum)
    agsum[, c('consss', 'conssi', 'FVhct')] <- agsum[, c('consss', 'conssi', 'FVhct')]/
      agsum[, c('cons', 'cons', 'FV')]  # Weight average haircuts/spreads by value
    colnames(agsum)[colnames(agsum) %in% c('consss', 'conssi', 'FVhct')] <- 
      c('sprdsimp', 'simpint', 'hct')
    agmean <- aggregate(trnsno ~ year + ST + lndent + brwent + sec + secpr + ISIN + 
                          mat + crdec + lnd + brw, data=agdata, mean)  # Mean trnsn no
    r2 <- merge(agsum, agmean)  # Put mean trno into aggregated dataset
    r2[r2 == Inf | r2 == -Inf] <- NA
    repo <- r2
  }
  # Pull in OMO data
  omos <- c(0, 0, 0)  # Add zeros for missing entities
  for(y in 1:length(years)) omos <- rbind(omos, data.objects[[y]][[4]])
  brwomo <- omos$Cons[match(paste(floor(repo$ST), repo$brw),
                            paste(omos$SD, omos$Snd), nomatch=1)]
  lndomo <- omos$Cons[match(paste(floor(repo$ST), repo$lnd),
                            paste(omos$SD, omos$Snd), nomatch=1)]
  # Add in whether the security was in a futures settlement basket
  # These were taken from comparing ASX documentation with bond information in 
  # the holdings data.
  setisins <- c(paste(2012, c('AU0000XCLWM5', 'AU3TB0000051', 'AU3TB0000101')), 
                paste(2013, c('AU3TB0000051', 'AU3TB0000101', 'AU3TB0000143',
                              'AU3TB0000168')), 
                paste(2014, c('AU3TB0000101', 'AU3TB0000143', 'AU3TB0000168', 
                              'AU000XCLWAI8')), 
                paste(2015, c('AU3TB0000168', 'AU000XCLWAI8', 'AU3TB0000135')))
  delivsec <- (paste(repo$year, repo$ISIN) %in% setisins)*1
  # Add in whether the repo was likely followed by a short sale
  fn <- function(x) {  # Read in transactions data and remove repos from it
    y <- data.objects[[x]][[1]]
    matched <- c(unlist(y[[1]]), unlist(y[[3]]), abs(unlist(y[[4]][[1]])))
    y <- y[[2]][-matched, ]
    cbind(year=years[x], y[y[, 'Cons'] > 0, ])  # Remove zero cash transactions
  }
  trns <- do.call(rbind, lapply(1:8, fn)) 
  couldbe <- paste(repo$year, repo$lnd, repo$ISIN) %in%  # Narrow it down a bit
    paste(trns$year, trns$Snd, trns$ISIN) & repo$mat > 2
  shrtsale <- rep(0, nrow(repo))
  for(i in which(couldbe)) {  # This takes a while - loops over most of dataset
    shrtsale[i] <- any(trns$Snd == repo$lnd[i] & trns$ISIN == repo$ISIN[i] &
                         trns$ST > repo$ST[i] & trns$ST < repo$ST[i] + 2)
  }
  # Add in entity type.
  cps <- unique(c(as.character(repo$lndent), as.character(repo$brwent)))
  cln <- cps[grepl('1', cps)]
  sgv <- cps[substr(cps, 4, 5) == 'TR']
  brwdom <- rep('aus', nrow(repo))
  brwdom[repo$brwent %in% cln] <- 'cln'
  brwdom[repo$brwent %in% sgv] <- 'sgv'
  brwdom[repo$brwent %in% frn] <- 'frn'
  lnddom <- rep('aus', nrow(repo))
  lnddom[repo$lndent %in% cln] <- 'cln'
  lnddom[repo$lndent %in% sgv] <- 'sgv'
  lnddom[repo$lndent %in% frn] <- 'frn'
  
  day <- floor(repo$ST)
  time <- repo$ST - day
  colnames(repo)[colnames(repo) %in% c('lnd', 'brw')] <- c('lndact', 'brwact')
  cbind(repo, day, time, brwomo, lndomo, brwdom, lnddom, delivsec, shrtsale)
}
# Figure 9
turnovershare.chart.F <- function(year, repo, stt, frn, trim.min=1) {
  # trim.min is the minimum-percentage entity (summing brw & lnd) to include
  x <- repo[as.character(repo$brwent) != as.character(repo$lndent) & 
              repo$year == year & nchar(as.character(repo$lndent)) != 5 &
              nchar(as.character(repo$brwent)) != 5, ]  # 5 chars indicates st gov
  ents <- unique(c(as.character(x$brwent), as.character(x$lndent)))
  lndag <- aggregate(cons ~ lndent, data=x, sum)  # Aggregate turnvover by entity
  lnd <- lndag$cons[match(ents, lndag$lndent)]  # Align with 'ents'
  lnd[is.na(lnd)] <- 0
  lnd <- 100*lnd/sum(lnd)  # Make percentage of total
  brwag <- aggregate(cons ~ brwent, data=x, sum)  # repeat for borrower turnover
  brw <- brwag$cons[match(ents, brwag$brwent)]  
  brw[is.na(brw)] <- 0
  brw <- 100*brw/sum(brw)
  # Add in entity types
  enttyp <- rep(NA, length(ents))
  enttyp[grepl('1', ents)] <- 'cln'
  enttyp[substr(ents, 4, 5) == 'TR'] <- 'sgv'
  enttyp[ents %in% frn] <- 'frn'
  enttyp[is.na(enttyp)] <- 'aus'
  # Put it together
  turnover <- data.frame(ents, enttyp, lnd, brw)
  turnover <- turnover[order(lnd + brw), ]
  turnover[turnover$lnd + turnover$brw >= trim.min, ]
}
# Figure 10
network.graph.2015.F <- function(outst) {
  # NEEDS TO BE RUN ON 14-DAY MATURITY CAP REPO DATA
  outst <- outst[nchar(as.character(outst$lnd)) < 5 &  # Remove state govt entities
                   nchar(as.character(outst$brw)) < 5, ]
  # Average positions across days
  dav <- cbind(outst[, c('lnd', 'brw', 'lndbrw', 'sec')], av=rowMeans(outst[, 5:48]))
  dav <- aggregate(av ~ lnd + brw + lndbrw, data=dav, sum)  # Sum across sec types
  gros.dav <- rep(0, nrow(dav))  # Average gross bilateral exposures
  if(sum(duplicated(dav$av)) > 0) warning('loop will fail to sum equal opposing posns')
  for(i in 1:nrow(dav)) {  # Look for opposite direction bilateral positions
    rev <- which(paste(dav$brw, dav$lnd) %in% dav$lndbrw[i])
    if(length(rev) == 0) {
      gros.dav[i] <- dav$av[i]  
    } else if(dav$av[rev] < dav$av[i]) gros.dav[i] <- dav$av[i] + dav$av[rev]
  }  # If any found sum them to get gross bilateral position
  edges <- cbind(dav, gros.dav)
  edges <- edges[edges$gros.dav > 0, c('lnd', 'brw', 'gros.dav')]
  # Create the nodes object and its characteristics
  cps <- sort(unique(c(as.character(dav$lnd), as.character(dav$brw))))
  # Net lending positions
  lndag <- aggregate(av ~ lnd, data=dav, sum)
  brwag <- aggregate(av ~ brw, data=dav, sum)
  netlndsz <- rep(0, length(cps))
  for(i in 1:length(cps)) {  # Subtract any borrowing from any lending
    if(cps[i] %in% lndag$lnd & cps[i] %in% brwag$brw) {
      netlndsz[i] <- lndag$av[lndag$lnd == cps[i]] - brwag$av[brwag$brw == cps[i]]
    } else {  # Some entities are only lenders or only borrowers
      if(cps[i] %in% lndag$lnd) netlndsz[i] <- lndag$av[lndag$lnd == cps[i]] 
      if(cps[i] %in% brwag$brw) netlndsz[i] <- -brwag$av[brwag$brw == cps[i]]
    }
  }
  netlndsgn <- netlndsz > 0  # Store whether net lender or borrower
  netlndsz <- log(abs(netlndsz)/1e6 + 2)  # CHANGE THIS SCALE TO GET IT LOOKING BETTER
  # Types: 1 Aus, 2 Frn, 3 clnt, 4 State gov
  cptyp <- rep(1, length(cps))
  # This has been calibrated to the particular data. The next line is for dummy data
  frn <- c(3, 5:8, 10, 13, 14, 16, 18, 24:26, 28, 29)
  if(max(frn) > length(cps)) frn <- sample.int(length(cps))[1:(length(cps)/2)]
  cptyp[frn] <- 2
  cptyp[grepl('1', cps)] <- 3
  cptyp[substr(cps, nchar(cps) - 1, nchar(cps)) == 'TR'] <- 4
  nds <- cbind(cps, cptyp, netlndsgn, netlndsz)  # Pull together node data
  # Create graph object
  netgrph <- graph_from_data_frame(d=edges, vertices=nds, directed=FALSE)
  # Node colours
  cptypc <- rep(NA, length(cptyp))
  cptypc[cptyp == 1] <- 'blue'
  cptypc[cptyp == 2] <- 'firebrick'
  cptypc[cptyp == 3] <- 'goldenrod'
  cptypc[cptyp == 4] <- 'darkgreen'
  # Node shapes
  nshp <- rep(NA, length(length(cps)))
  nshp[netlndsgn] <- 'circle'
  nshp[!netlndsgn] <- 'square'
  # Edge characteristics
  E(netgrph)$width <- log(edges$gros.dav/1e7 + 1)  # ALSO CHANGE THIS FOR APPEARANCE
  E(netgrph)$color <- 'gray50'
  V(netgrph)$color <- cptypc
  V(netgrph)$shape <- nshp
  V(netgrph)$size <- netlndsz
  l <- layout_with_fr(netgrph)  # Graph layout
  plot(netgrph, layout=l, edge.arrow.size=1, vertex.label=NA)
}
# Figure 13
intraday.plot.F <- function(repo, year, htpair, rmvhtpair=TRUE) {
  subsamp <- cbind(repo, lb=paste(repo$lndent, repo$brwent), SD=floor(repo$ST), 
                    time=repo$ST - floor(repo$ST))
  subsamp <- subsamp[subsamp$year == year & subsamp$crdec == 0 & 
                         subsamp$trnsno == 2, ]
  if(rmvhtpair) subsamp <- subsamp[!subsamp$lb == paste(htpair['lnd'], htpair['brw']), ]
  # Use residuals from spreads regression, to control for variables
  sprdresids <- summary(lm(sprdsimp ~ sec + lb + mat + SD, data=subsamp))$residuals
  subsamp <- cbind(subsamp, sprdresids,  # Add in buckets for time of day 
                    tmcat=.bincode(subsamp$time, 15/(24*60)*1:96), ones=1)
  volume <- aggregate(ones ~ tmcat, data=subsamp, sum)  # From the chart series
  value <- aggregate(cons ~ tmcat, data=subsamp, sum)
  avsprdct <- aggregate(sprdresids ~ tmcat, data=subsamp, mean)
  avsprd <- aggregate(sprdsimp ~ tmcat, data=subsamp, mean)
  mat <- aggregate(mat ~ tmcat, data=subsamp, mean)
  pltdata <- 0:96
  time <- seq(0, 24, by=0.25)
  D <- length(unique(floor(subsamp$ST)))
  pltdata <- cbind(time=time, bin=pltdata,  # Combine the daily average series
                volume=(volume[, 2]/D)[match(pltdata, volume$tmcat)], 
                value=(value[, 2]/(1e6*D))[match(pltdata, value$tmcat)], 
                sprd=round(100*avsprd[match(pltdata, avsprd$tmcat), 2], 2), 
                sprdcntrl=round(100*avsprdct[match(pltdata, avsprdct$tmcat), 2], 2), 
                maturity=mat[match(pltdata, mat$tmcat), 2])
  pltdata <- pltdata[!(is.na(pltdata[, 'volume']) & is.na(pltdata[, 'value']) & 
                     is.na(pltdata[, 'sprdcntrl']) & is.na(pltdata[, 'sprd']) &
                     is.na(pltdata[, 'maturity'])), ]
  pltdata
}






